{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Congestion Charges - Medium"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Setting default log level to \"WARN\".\n",
      "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n"
     ]
    }
   ],
   "source": [
    "import findspark\n",
    "import pandas as pd\n",
    "findspark.init()\n",
    "\n",
    "SVR = '192.168.31.31'\n",
    "from pyspark.sql import SparkSession\n",
    "from pyspark.sql.functions import *\n",
    "from pyspark.sql import Window\n",
    "\n",
    "sc = (SparkSession.builder.appName('app18-2') \n",
    "      .master(f'spark://{SVR}:7077') \n",
    "      .config('spark.sql.warehouse.dir', f'hdfs://{SVR}:9000/user/hive/warehouse') \n",
    "      .config('spark.cores.max', '4') \n",
    "      .config('spark.executor.instances', '1') \n",
    "      .config('spark.executor.cores', '2') \n",
    "      .config('spark.executor.memory', '10g') \n",
    "      .enableHiveSupport().getOrCreate())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "camera = sc.read.table('sqlzoo.camera')\n",
    "keeper = sc.read.table('sqlzoo.keeper')\n",
    "vehicle = sc.read.table('sqlzoo.vehicle')\n",
    "image = sc.read.table('sqlzoo.image')\n",
    "permit = sc.read.table('sqlzoo.permit')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1.\n",
    "List the owners (name and address) of Vehicles caught by camera 1 or 18 without duplication."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "                                                                                \r"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>address</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Ambiguous, Arthur</td>\n",
       "      <td>Absorption Ave.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Inconspicuous, Iain</td>\n",
       "      <td>Interception Rd.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Strenuous, Sam</td>\n",
       "      <td>Surjection Street</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  name            address\n",
       "0    Ambiguous, Arthur    Absorption Ave.\n",
       "1  Inconspicuous, Iain   Interception Rd.\n",
       "2       Strenuous, Sam  Surjection Street"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(keeper.withColumnRenamed('id', 'keeper')\n",
    " .join(vehicle, on='keeper')\n",
    " .join(image.filter(image['camera'].isin([1, 18])), \n",
    "       on=(vehicle['id']==image['reg']))\n",
    " .select('name', 'address')\n",
    " .distinct()\n",
    " .toPandas())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2.\n",
    "Show keepers (name and address) who have more than 5 vehicles."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>address</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Ambiguous, Arthur</td>\n",
       "      <td>Absorption Ave.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Inconspicuous, Iain</td>\n",
       "      <td>Interception Rd.</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  name           address\n",
       "0    Ambiguous, Arthur   Absorption Ave.\n",
       "1  Inconspicuous, Iain  Interception Rd."
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(keeper.withColumnRenamed('id', 'keeper')\n",
    " .join(vehicle, on='keeper')\n",
    " .groupBy('name', 'address')\n",
    " .agg(count('keeper').alias('keeper'))\n",
    " .filter(col('keeper')>5)\n",
    " .select('name', 'address')\n",
    " .toPandas())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3.\n",
    "For each vehicle show the number of current permits (suppose today is the 1st of Feb 2007). The list should include the vehicle.s registration and the number of permits. Current permits can be determined based on charge types, e.g. for weekly permit you can use the date after 24 Jan 2007 and before 02 Feb 2007."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>reg</th>\n",
       "      <th>reg</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>SO 02 DSP</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>SO 02 DTP</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>SO 02 JSP</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>SO 02 KSP</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>SO 02 KTP</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>SO 02 QSP</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>SO 02 RSP</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         reg  reg\n",
       "0  SO 02 DSP    1\n",
       "1  SO 02 DTP    1\n",
       "2  SO 02 JSP    1\n",
       "3  SO 02 KSP    1\n",
       "4  SO 02 KTP    1\n",
       "5  SO 02 QSP    1\n",
       "6  SO 02 RSP    1"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(permit.withColumn('sdate', to_timestamp(col('sdate')))\n",
    " .withColumn('edate', when(\n",
    "    col('chargetype')=='Daily', col('sdate') + expr('interval 1 day')).when(\n",
    "    col('chargetype')=='Weekly', col('sdate') + expr('interval 1 week')).when(\n",
    "    col('chargetype')=='Monthly', col('sdate') + expr('interval 1 month')).when(\n",
    "    col('chargetype')=='Annual', col('sdate') + expr('interval 1 year')))\n",
    " .filter((col('sdate') <= '2007-02-01') & (col('edate') >= '2007-02-01'))\n",
    " .groupBy('reg')\n",
    " .agg(count('chargetype').alias('reg'))\n",
    " .orderBy('reg')\n",
    " .toPandas())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4.\n",
    "Obtain a list of every vehicle passing camera 10 on 25th Feb 2007. Show the time, the registration and the name of the keeper if available."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>reg</th>\n",
       "      <th>whn</th>\n",
       "      <th>name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>SO 02 CSP</td>\n",
       "      <td>2007-02-25 07:45:11.0</td>\n",
       "      <td>Ambiguous, Arthur</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>SO 02 ESP</td>\n",
       "      <td>2007-02-25 18:08:40.0</td>\n",
       "      <td>Ambiguous, Arthur</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         reg                    whn               name\n",
       "0  SO 02 CSP  2007-02-25 07:45:11.0  Ambiguous, Arthur\n",
       "1  SO 02 ESP  2007-02-25 18:08:40.0  Ambiguous, Arthur"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(vehicle.join(keeper.withColumnRenamed('id', 'keeper'), on='keeper')\n",
    " .join(image.filter((image['whn'].between('2007-02-25', '2007-02-26')) &\n",
    "                    (image['camera']==10)), on=(vehicle['id']==image['reg']))\n",
    " .select('reg', 'whn', 'name')\n",
    " .toPandas())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5.\n",
    "List the keepers who have more than 4 vehicles and one of them must have more than 2 permits. The list should include the names and the number of vehicles."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>n_vehicle</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Inconspicuous, Iain</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  name  n_vehicle\n",
       "0  Inconspicuous, Iain          7"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# keepers who have more than 2 permits\n",
    "t = (vehicle.join(permit\n",
    "                  .groupBy('reg')\n",
    "                  .agg(count('chargetype').alias('npermit'))\n",
    "                  .filter(col('npermit')>2)\n",
    "                  .select('reg').alias('p'), \n",
    "                  on=(vehicle['id']==col('reg')))\n",
    "    .select('keeper'))\n",
    "\n",
    "(vehicle.join(keeper.withColumnRenamed('id', 'keeper')\n",
    "              .join(t, on='keeper'),\n",
    "              on='keeper')\n",
    " .groupBy('name')\n",
    " .agg(count('id').alias('n_vehicle'))\n",
    " .select('name', 'n_vehicle')\n",
    " .filter(col('n_vehicle')>4)\n",
    " .toPandas())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.stop()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
